library(sp)
library(gstat)
library(sf)
library(mapview)
library(tidyverse)
data(meuse)
head(meuse)
## x y cadmium copper lead zinc elev dist om ffreq soil lime
## 1 181072 333611 11.7 85 299 1022 7.909 0.00135803 13.6 1 1 1
## 2 181025 333558 8.6 81 277 1141 6.983 0.01222430 14.0 1 1 1
## 3 181165 333537 6.5 68 199 640 7.800 0.10302900 13.0 1 1 1
## 4 181298 333484 2.6 81 116 257 7.655 0.19009400 8.0 1 2 0
## 5 181307 333330 2.8 48 117 269 7.480 0.27709000 8.7 1 2 0
## 6 181390 333260 3.0 61 137 281 7.791 0.36406700 7.8 1 2 0
## landuse dist.m
## 1 Ah 50
## 2 Ah 30
## 3 Ah 150
## 4 Ga 270
## 5 Ah 380
## 6 Ga 470
ggplot(data = meuse) + geom_point(aes(x, y))
data(meuse.grid)
head(meuse.grid)
## x y part.a part.b dist soil ffreq
## 1 181180 333740 1 0 0.0000000 1 1
## 2 181140 333700 1 0 0.0000000 1 1
## 3 181180 333700 1 0 0.0122243 1 1
## 4 181220 333700 1 0 0.0434678 1 1
## 5 181100 333660 1 0 0.0000000 1 1
## 6 181140 333660 1 0 0.0122243 1 1
ggplot(data = meuse.grid) + geom_point(aes(x, y))
summary(meuse)
## x y cadmium copper
## Min. :178605 Min. :329714 Min. : 0.200 Min. : 14.00
## 1st Qu.:179371 1st Qu.:330762 1st Qu.: 0.800 1st Qu.: 23.00
## Median :179991 Median :331633 Median : 2.100 Median : 31.00
## Mean :180005 Mean :331635 Mean : 3.246 Mean : 40.32
## 3rd Qu.:180630 3rd Qu.:332463 3rd Qu.: 3.850 3rd Qu.: 49.50
## Max. :181390 Max. :333611 Max. :18.100 Max. :128.00
##
## lead zinc elev dist
## Min. : 37.0 Min. : 113.0 Min. : 5.180 Min. :0.00000
## 1st Qu.: 72.5 1st Qu.: 198.0 1st Qu.: 7.546 1st Qu.:0.07569
## Median :123.0 Median : 326.0 Median : 8.180 Median :0.21184
## Mean :153.4 Mean : 469.7 Mean : 8.165 Mean :0.24002
## 3rd Qu.:207.0 3rd Qu.: 674.5 3rd Qu.: 8.955 3rd Qu.:0.36407
## Max. :654.0 Max. :1839.0 Max. :10.520 Max. :0.88039
##
## om ffreq soil lime landuse dist.m
## Min. : 1.000 1:84 1:97 0:111 W :50 Min. : 10.0
## 1st Qu.: 5.300 2:48 2:46 1: 44 Ah :39 1st Qu.: 80.0
## Median : 6.900 3:23 3:12 Am :22 Median : 270.0
## Mean : 7.478 Fw :10 Mean : 290.3
## 3rd Qu.: 9.000 Ab : 8 3rd Qu.: 450.0
## Max. :17.000 (Other):25 Max. :1000.0
## NA's :2 NA's : 1
ggplot(data = meuse) + geom_point(aes(x, y, size=zinc))
class(meuse)
## [1] "data.frame"
class(meuse.grid)
## [1] "data.frame"
meuse2 <- st_as_sf(meuse, coords = c("x", "y"), crs = 28992)
meuse.grid2 <- st_as_sf(meuse.grid, coords = c("x", "y"),
crs = 28992)
class(meuse2)
## [1] "sf" "data.frame"
class(meuse.grid2)
## [1] "sf" "data.frame"
head(meuse2)
## Simple feature collection with 6 features and 12 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 181025 ymin: 333260 xmax: 181390 ymax: 333611
## Projected CRS: Amersfoort / RD New
## cadmium copper lead zinc elev dist om ffreq soil lime landuse dist.m
## 1 11.7 85 299 1022 7.909 0.00135803 13.6 1 1 1 Ah 50
## 2 8.6 81 277 1141 6.983 0.01222430 14.0 1 1 1 Ah 30
## 3 6.5 68 199 640 7.800 0.10302900 13.0 1 1 1 Ah 150
## 4 2.6 81 116 257 7.655 0.19009400 8.0 1 2 0 Ga 270
## 5 2.8 48 117 269 7.480 0.27709000 8.7 1 2 0 Ah 380
## 6 3.0 61 137 281 7.791 0.36406700 7.8 1 2 0 Ga 470
## geometry
## 1 POINT (181072 333611)
## 2 POINT (181025 333558)
## 3 POINT (181165 333537)
## 4 POINT (181298 333484)
## 5 POINT (181307 333330)
## 6 POINT (181390 333260)
head(meuse.grid2)
## Simple feature collection with 6 features and 5 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 181100 ymin: 333660 xmax: 181220 ymax: 333740
## Projected CRS: Amersfoort / RD New
## part.a part.b dist soil ffreq geometry
## 1 1 0 0.0000000 1 1 POINT (181180 333740)
## 2 1 0 0.0000000 1 1 POINT (181140 333700)
## 3 1 0 0.0122243 1 1 POINT (181180 333700)
## 4 1 0 0.0434678 1 1 POINT (181220 333700)
## 5 1 0 0.0000000 1 1 POINT (181100 333660)
## 6 1 0 0.0122243 1 1 POINT (181140 333660)
mapview(meuse2, zcol = "zinc", map.types = "CartoDB.Voyager")
mapview(meuse.grid2, map.types = "CartoDB.Voyager")
ggplot(meuse, aes(x=dist, y=zinc)) + geom_point()
ggplot(meuse, aes(x=dist, y=log(zinc))) + geom_point()
ggplot(meuse, aes(x=sqrt(dist), y=log(zinc))) + geom_point()
library(GGally)
ggpairs(meuse)
ggpairs(meuse[,3:9])
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_density()`).
lm <- lm(log(zinc)~sqrt(dist), meuse)
meuse$residuals <- residuals(lm)
meuse$fitted <- fitted(lm)
meuse$fitted2 <- predict(lm, meuse)
meuse$fitted.s <- predict(lm, meuse) - mean(predict(lm, meuse))
head(meuse)
## x y cadmium copper lead zinc elev dist om ffreq soil lime
## 1 181072 333611 11.7 85 299 1022 7.909 0.00135803 13.6 1 1 1
## 2 181025 333558 8.6 81 277 1141 6.983 0.01222430 14.0 1 1 1
## 3 181165 333537 6.5 68 199 640 7.800 0.10302900 13.0 1 1 1
## 4 181298 333484 2.6 81 116 257 7.655 0.19009400 8.0 1 2 0
## 5 181307 333330 2.8 48 117 269 7.480 0.27709000 8.7 1 2 0
## 6 181390 333260 3.0 61 137 281 7.791 0.36406700 7.8 1 2 0
## landuse dist.m residuals fitted fitted2 fitted.s
## 1 Ah 50 0.02907908 6.900438 6.900438 1.014661840
## 2 Ah 30 0.32712956 6.712531 6.712531 0.826754936
## 3 Ah 150 0.28533439 6.176134 6.176134 0.290357936
## 4 Ga 270 -0.33385786 5.882934 5.882934 -0.002841905
## 5 Ah 380 -0.05778586 5.652497 5.652497 -0.233278608
## 6 Ga 470 0.18211082 5.456244 5.456244 -0.429532005
library(viridis)
## Loading required package: viridisLite
ggplot(meuse, aes(x=x, y=y, col=fitted))+geom_point()+scale_color_viridis()
ggplot(meuse, aes(x=x, y=y, col=residuals))+geom_point()+scale_color_viridis()
ggplot(meuse, aes(x=x, y=y, col=fitted.s))+geom_point()+scale_color_viridis()
library(stars) |> suppressPackageStartupMessages()
library(gstat)
idw_result <- idw(zinc~1, meuse2, meuse.grid2)
## [inverse distance weighted interpolation]
idw_result
## Simple feature collection with 3103 features and 2 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 178460 ymin: 329620 xmax: 181540 ymax: 333740
## Projected CRS: Amersfoort / RD New
## First 10 features:
## var1.pred var1.var geometry
## 1 633.6864 NA POINT (181180 333740)
## 2 712.5450 NA POINT (181140 333700)
## 3 654.1617 NA POINT (181180 333700)
## 4 604.4422 NA POINT (181220 333700)
## 5 857.2558 NA POINT (181100 333660)
## 6 755.5061 NA POINT (181140 333660)
## 7 667.7526 NA POINT (181180 333660)
## 8 604.4254 NA POINT (181220 333660)
## 9 1003.8711 NA POINT (181060 333620)
## 10 945.0017 NA POINT (181100 333620)
# Convert IDW output to a data frame
idw_df <- as.data.frame(idw_result)
head(idw_df)
## var1.pred var1.var geometry
## 1 633.6864 NA POINT (181180 333740)
## 2 712.5450 NA POINT (181140 333700)
## 3 654.1617 NA POINT (181180 333700)
## 4 604.4422 NA POINT (181220 333700)
## 5 857.2558 NA POINT (181100 333660)
## 6 755.5061 NA POINT (181140 333660)
colnames(idw_df) <- c("x", "y", "zinc_pred")
head(idw_df)
## x y zinc_pred
## 1 633.6864 NA POINT (181180 333740)
## 2 712.5450 NA POINT (181140 333700)
## 3 654.1617 NA POINT (181180 333700)
## 4 604.4422 NA POINT (181220 333700)
## 5 857.2558 NA POINT (181100 333660)
## 6 755.5061 NA POINT (181140 333660)
ggplot(data = meuse2) + geom_sf(data = idw_result, aes(color = var1.pred)) +
geom_sf() +
scale_color_viridis() + theme_bw()
hscat(log(zinc)~1, meuse2, (0:9)*100)
vc <- variogram(log(zinc) ~ 1, meuse2, cloud = TRUE)
plot(vc)
v <- variogram(log(zinc) ~ 1, data = meuse2)
plot(v)
vgm()
## short long
## 1 Nug Nug (nugget)
## 2 Exp Exp (exponential)
## 3 Sph Sph (spherical)
## 4 Gau Gau (gaussian)
## 5 Exc Exclass (Exponential class/stable)
## 6 Mat Mat (Matern)
## 7 Ste Mat (Matern, M. Stein's parameterization)
## 8 Cir Cir (circular)
## 9 Lin Lin (linear)
## 10 Bes Bes (bessel)
## 11 Pen Pen (pentaspherical)
## 12 Per Per (periodic)
## 13 Wav Wav (wave)
## 14 Hol Hol (hole)
## 15 Log Log (logarithmic)
## 16 Pow Pow (power)
## 17 Spl Spl (spline)
## 18 Leg Leg (Legendre)
## 19 Err Err (Measurement error)
## 20 Int Int (Intercept)
show.vgms(par.strip.text = list(cex = 0.75))
vg.fit1 <- vgm(psill = 0.5, model = "Sph",
range = 900, nugget = 0.1)
vg.fit1
## model psill range
## 1 Nug 0.1 0
## 2 Sph 0.5 900
plot(v, vg.fit1 , cutoff = 1000, cex = 1.5)
fv <- fit.variogram(object = v,
model = vgm(psill = 0.5, model = "Sph",
range = 900, nugget = 0.1))
fv
## model psill range
## 1 Nug 0.05066017 0.0000
## 2 Sph 0.59060556 897.0044
attr(fv, "SSErr")
## [1] 9.011194e-06
plot(v, fv, cex = 1.5)
fv2 <- fit.variogram(object = v,
model = vgm(psill = 0.6, model = "Sph",range = 900, nugget = 0.1))
fv2
## model psill range
## 1 Nug 0.05066522 0.0000
## 2 Sph 0.59061054 897.0412
attr(fv2, "SSErr")
## [1] 9.011195e-06
plot(v, fv2, cex = 1.5)
fv3 <- fit.variogram(object = v,
model = vgm(psill = 1, model = "Exp",range = 800, nugget = 0.1))
fv3
## model psill range
## 1 Nug 0.0000000 0.0000
## 2 Exp 0.7186519 449.7572
attr(fv3, "SSErr")
## [1] 1.628328e-05
plot(v, fv3, cex = 1.5)
library(ggplot2)
library(viridis)
k <- gstat(formula = log(zinc) ~ 1, data = meuse2, model = fv)
kpred <- predict(k, meuse.grid2)
## [using ordinary kriging]
head(kpred)
## Simple feature collection with 6 features and 2 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 181100 ymin: 333660 xmax: 181220 ymax: 333740
## Projected CRS: Amersfoort / RD New
## var1.pred var1.var geometry
## 1 6.499619 0.3198083 POINT (181180 333740)
## 2 6.622352 0.2520197 POINT (181140 333700)
## 3 6.505162 0.2729850 POINT (181180 333700)
## 4 6.387586 0.2955288 POINT (181220 333700)
## 5 6.764491 0.1779405 POINT (181100 333660)
## 6 6.635511 0.2022032 POINT (181140 333660)
ggplot() + geom_sf(data = kpred, aes(color = var1.pred)) +
geom_sf(data = meuse2) +
scale_color_viridis(name = "log(zinc)") + theme_bw()
ggplot() + geom_sf(data = kpred, aes(color = var1.var)) +
geom_sf(data = meuse2) +
scale_color_viridis(name = "variance") + theme_bw()
vg.dir <- variogram(log(zinc)~1, alpha = c(0, 45, 90, 135), meuse2)
plot(vg.dir)
The function call vgm(psill=0.59, model = “Sph”, range = 1200, nugget = 0.05, anis = c(45, 0.4)) in R creates a variogram model using the vgm() function from the gstat package. Here’s what each parameter means:
psill = 0.59: This is the partial sill, representing the difference between the sill (total variance) and the nugget effect. It defines the maximum semivariance minus the nugget.
model = “Sph”: The model type is Spherical, one of the commonly used variogram models. It describes how spatial correlation decreases with distance.
range = 1200: This is the range, the distance at which the spatial correlation drops to near zero (or 95% of the sill for the spherical model). Beyond this distance, observations are considered uncorrelated.
nugget = 0.05: The nugget effect, representing spatial variability at very small scales or measurement errors. It accounts for discontinuity at very short distances.
anis = c(45, 0.4): Specifies anisotropy, meaning the spatial correlation differs by direction.
45 indicates the direction (angle in degrees from the x-axis) where the correlation is strongest.
0.4 is the anisotropy ratio, meaning correlation decays faster in the perpendicular direction. A value of 0.4 means that in the perpendicular direction, the range is only 40% of the main direction’s range.
fit.ani <- vgm(psill=0.59, model = "Sph", range = 1200, nugget = 0.05, anis = c(45, 0.4))
plot(vg.dir, fit.ani)
This variogram model describes spatial dependence using a spherical variogram with anisotropy. The correlation extends up to 1200 units in the primary direction (45°), but only 480 units (1200 × 0.4) in the perpendicular direction. There is a small nugget effect (0.05), indicating slight short-scale variation or measurement noise.
Another way to detect directional dependence
vg.map <- variogram(log(zinc)~1, meuse2, map=T, cutoff=1000, width=100)
plot(vg.map, threshold=5)
vg.map <- variogram(log(zinc) ~ 1, meuse, map = TRUE, cutoff = 1000, width = 100)This code computes and visualizes the variogram map of the log-transformed zinc concentrations in the Meuse dataset.
variogram(log(zinc) ~ 1, meuse, …)
Computes the experimental semivariogram for log-transformed zinc concentrations. The ~ 1 formula means it calculates the variogram without considering covariates (i.e., only based on spatial distances).map = TRUE
Instead of returning a traditional one-dimensional variogram, this creates a spatial variogram map, which helps identify anisotropy (directional dependence of spatial variation).
cutoff = 1000
Defines the maximum lag distance (1,000 meters). Pairs of points separated by distances greater than this are ignored.
width = 100
Sets the bin width for grouping distances. Distance intervals of 100 meters are used when computing the variogram.
plot(vg.map, …)
Plots the spatial variogram map, where each pixel represents a directional semivariance value for a given spatial lag.
threshold = 5
Sets a threshold to filter high semivariance values to make the plot more interpretable.
log(zinc)~sqrt(dist)vgreg.est <- variogram(log(zinc)~sqrt(dist), meuse2)
plot(vgreg.est)
vgreg.fit <- fit.variogram(vgreg.est, vgm(0.2, "Sph", 800, 0.05))
vgreg.fit
## model psill range
## 1 Nug 0.07978675 0.0000
## 2 Sph 0.14902954 871.8744
plot(vgreg.est, vgreg.fit, cex = 1.5)
vgreg.dir <- variogram(log(zinc)~sqrt(dist), meuse2, alpha=c(0, 45, 90, 135))
plot(vgreg.dir)
plot(vgreg.dir, vgreg.fit, cex = 1.5)
The directional dependence is much less obvious in this case.
Another way to show this is variogram maps
vgreg.map <- variogram(log(zinc)~sqrt(dist), meuse2, map=T, cutoff=1000, width=100)
plot(vgreg.map, threshold=5)
Note: This is an illustration of the codes. In the current implementation, anisotropy is ignored, meaning the model assumes the spatial correlation is the same in all directions. If anisotropy exists (i.e., the spatial dependence varies by direction), we need to incorporate it explicitly in the variogram model.
zinc_mean <- mean(log(meuse$zinc)) # Required for SK
v_sk_est <- variogram(log(zinc) ~ 1, meuse2, cloud=F)
v_sk_est
## np dist gamma dir.hor dir.ver id
## 1 57 79.29244 0.1234479 0 0 var1
## 2 299 163.97367 0.2162185 0 0 var1
## 3 419 267.36483 0.3027859 0 0 var1
## 4 457 372.73542 0.4121448 0 0 var1
## 5 547 478.47670 0.4634128 0 0 var1
## 6 533 585.34058 0.5646933 0 0 var1
## 7 574 693.14526 0.5689683 0 0 var1
## 8 564 796.18365 0.6186769 0 0 var1
## 9 589 903.14650 0.6471479 0 0 var1
## 10 543 1011.29177 0.6915705 0 0 var1
## 11 500 1117.86235 0.7033984 0 0 var1
## 12 477 1221.32810 0.6038770 0 0 var1
## 13 452 1329.16407 0.6517158 0 0 var1
## 14 457 1437.25620 0.5665318 0 0 var1
## 15 415 1543.20248 0.5748227 0 0 var1
plot(v_sk_est, main="Sample Variogram")
v_sk_fit <- fit.variogram(v_sk_est, model = vgm(1, "Sph", 900, 0.1))
v_sk_fit
## model psill range
## 1 Nug 0.05066243 0.0000
## 2 Sph 0.59060780 897.0209
plot(v_sk_est, v_sk_fit)
attr(v_sk_fit, "SSErr")
## [1] 9.011194e-06
krige_sk <- krige(log(zinc) ~ 1, meuse2, meuse.grid2, model = v_sk_fit, beta = zinc_mean)
## [using simple kriging]
head(krige_sk)
## Simple feature collection with 6 features and 2 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 181100 ymin: 333660 xmax: 181220 ymax: 333740
## Projected CRS: Amersfoort / RD New
## var1.pred var1.var geometry
## 1 6.447758 0.3160027 POINT (181180 333740)
## 2 6.585255 0.2500732 POINT (181140 333700)
## 3 6.465116 0.2707163 POINT (181180 333700)
## 4 6.343498 0.2927786 POINT (181220 333700)
## 5 6.741960 0.1772242 POINT (181100 333660)
## 6 6.610664 0.2013310 POINT (181140 333660)
ggplot(data = meuse2) + geom_sf(data = krige_sk, aes(color = var1.pred)) +
geom_sf() +
scale_color_viridis() + theme_bw()
ggplot(data = meuse2) + geom_sf(data = krige_sk, aes(color = var1.var)) +
geom_sf() +
scale_color_viridis() + theme_bw()
v_ok <- variogram(log(zinc) ~ 1, meuse2)
v_ok_fit <- fit.variogram(v_ok, model = vgm(1, "Sph", 900, 0.1))
krige_ok <- krige(log(zinc) ~ 1, meuse2, meuse.grid2, model = v_ok_fit)
## [using ordinary kriging]
head(krige_ok)
## Simple feature collection with 6 features and 2 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 181100 ymin: 333660 xmax: 181220 ymax: 333740
## Projected CRS: Amersfoort / RD New
## var1.pred var1.var geometry
## 1 6.499624 0.3198084 POINT (181180 333740)
## 2 6.622356 0.2520205 POINT (181140 333700)
## 3 6.505166 0.2729855 POINT (181180 333700)
## 4 6.387590 0.2955290 POINT (181220 333700)
## 5 6.764492 0.1779424 POINT (181100 333660)
## 6 6.635513 0.2022045 POINT (181140 333660)
ggplot(data = meuse2) + geom_sf(data = krige_ok, aes(color = var1.pred)) +
geom_sf() +
scale_color_viridis() + theme_bw()
ggplot(data = meuse2) + geom_sf(data = krige_ok, aes(color = var1.var)) +
geom_sf() +
scale_color_viridis() + theme_bw()
v_uk <- variogram(log(zinc) ~ sqrt(dist), meuse2)
v_uk_fit <- fit.variogram(v_uk, model = vgm(1, "Sph", 900, 0.1))
krige_uk <- krige(log(zinc) ~ sqrt(dist), meuse2, meuse.grid2, model = v_uk_fit)
## [using universal kriging]
head(krige_uk)
## Simple feature collection with 6 features and 2 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 181100 ymin: 333660 xmax: 181220 ymax: 333740
## Projected CRS: Amersfoort / RD New
## var1.pred var1.var geometry
## 1 7.071054 0.1683803 POINT (181180 333740)
## 2 7.088269 0.1518497 POINT (181140 333700)
## 3 6.785167 0.1537665 POINT (181180 333700)
## 4 6.512936 0.1576136 POINT (181220 333700)
## 5 7.104999 0.1343480 POINT (181100 333660)
## 6 6.804171 0.1368075 POINT (181140 333660)
ggplot(data = meuse2) + geom_sf(data = krige_uk, aes(color = var1.pred)) +
geom_sf() +
scale_color_viridis() + theme_bw()
ggplot(data = meuse2) + geom_sf(data = krige_uk, aes(color = var1.var)) +
geom_sf() +
scale_color_viridis() + theme_bw()
library(gstat)
# Compute directional variogram with different angles (0, 45, 90, 135 degrees)
v_aniso <- variogram(log(zinc) ~ 1, meuse2, alpha = c(0, 45, 90, 135))
# Plot the directional variograms
plot(v_aniso)
The partial sill or psill represents the variance of the data (the overall range of spatial variation). In this case, it indicates that the variance of the spatial process is 0.59 after subtracting the nugget (if any).
This specifies the model of spatial correlation. “Sph” stands for the spherical model of variograms, which assumes the correlation increases with distance until it reaches a maximum (the range), after which it flattens out. Other models include “Exp” (exponential) and “Gau” (Gaussian), each representing different types of spatial correlation decay.
The range refers to the distance at which the spatial correlation becomes negligible (i.e., it reaches its sill). After this distance, there is no longer a significant correlation between points. Here, the range is set to 1200 units, meaning that beyond 1200 units, the spatial dependence is assumed to be zero.
The nugget represents the variance at very small distances, or the “micro-scale” variability that isn’t captured by the spatial model. This is usually attributed to measurement errors or small-scale unmodeled variation.
In this case, the nugget is 0.05, meaning there’s a small amount of unexplained variability at the smallest distances. anis = c(45, 0.4):
This specifies the anisotropy in the variogram model:
45: The angle in degrees that defines the major axis of the anisotropy. This means the range in this direction (45°) is different from the range in the perpendicular direction (90°).
0.4: The anisotropy ratio (major range / minor range). This means the range in the major direction (45°) is 0.4 times the range in the minor direction (perpendicular to 45°). In other words, the spatial correlation is stronger in the direction of the major axis (45°) and weaker in the perpendicular direction.
Partial Sill (0.59): The variance of the spatial process after accounting for the nugget. Spherical Model (“Sph”): Specifies the type of spatial correlation model used. Range (1200): The distance at which spatial correlation becomes negligible. Nugget (0.05): The small-scale variability (or measurement error) at very short distances. Anisotropy: A directional dependence in the spatial correlation: 45° direction has a range of 1200 units. The minor range (perpendicular to 45°) is 1200 / 0.4 = 3000 units.
# Fit anisotropic variogram model with a specified range and anisotropy ratio
fit.ani <- vgm(psill=0.59, model = "Sph", range = 1200, nugget = 0.05, anis = c(45, 0.4))
plot(vg.dir, fit.ani)
v_aniso_fit <- fit.variogram(v_aniso,
model = fit.ani)
v_aniso_fit
## model psill range ang1 anis1
## 1 Nug 0.05892577 0.000 0 1.0
## 2 Sph 0.58465419 1742.857 45 0.4
attr(v_aniso_fit, "SSErr")
## [1] 0.0005215035
krige_aniso_sk <- krige(log(zinc) ~ 1, meuse2, meuse.grid2, model = v_aniso_fit, beta = mean(log(meuse$zinc)))
## [using simple kriging]
ggplot(data = meuse2) + geom_sf(data = krige_aniso_sk, aes(color = var1.pred)) +
geom_sf() +
scale_color_viridis() + theme_bw()
ggplot(data = meuse2) + geom_sf(data = krige_aniso_sk, aes(color = var1.var)) +
geom_sf() +
scale_color_viridis() + theme_bw()
set.seed(42) # For reproducibility
# Sample indices for training set (70% of the data)
train_indices <- sample(1:nrow(meuse2), size = 0.7 * nrow(meuse2))
# Create training set and test set
train_data <- meuse2[train_indices, ]
test_data <- meuse2[-train_indices, ]
# Check the first few rows of both sets
head(train_data)
## Simple feature collection with 6 features and 12 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 178875 ymin: 329714 xmax: 180270 ymax: 331955
## Projected CRS: Amersfoort / RD New
## cadmium copper lead zinc elev dist om ffreq soil lime landuse
## 50 1.7 24 112 282 9.420 0.5542890 4.5 1 2 0 Bw
## 66 7.4 72 181 801 7.360 0.0122243 15.2 1 1 1 W
## 158 2.1 31 119 342 8.429 0.2770900 6.5 3 1 0 Ah
## 83 3.1 39 237 593 6.320 0.2009760 7.0 1 1 1 Ah
## 151 3.8 39 179 612 7.300 0.0537723 8.8 3 1 0 W
## 128 0.2 25 94 253 8.779 0.1030240 8.1 2 1 1 Tv
## dist.m geometry
## 50 660 POINT (180270 331707)
## 66 20 POINT (179334 331366)
## 158 350 POINT (178875 330311)
## 83 260 POINT (179110 330758)
## 151 80 POINT (179245 329714)
## 128 70 POINT (179642 331955)
head(test_data)
## Simple feature collection with 6 features and 12 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 180625 ymin: 332664 xmax: 181191 ymax: 333370
## Projected CRS: Amersfoort / RD New
## cadmium copper lead zinc elev dist om ffreq soil lime landuse dist.m
## 7 3.2 31 132 346 8.217 0.1900940 9.2 1 2 0 Ah 240
## 11 1.4 25 86 189 9.015 0.3151160 6.4 1 2 0 Fh 400
## 12 1.8 25 97 251 9.073 0.2281230 9.0 1 1 0 Ag 300
## 17 7.0 74 133 606 7.160 0.0122243 16.0 1 1 1 W 10
## 19 8.7 69 207 735 7.020 0.0000000 13.7 1 1 1 W 10
## 23 2.9 35 110 343 8.830 0.1139320 7.2 1 1 1 Ag 160
## geometry
## 7 POINT (181165 333370)
## 11 POINT (181191 333115)
## 12 POINT (181032 333031)
## 17 POINT (180763 333104)
## 19 POINT (180625 332847)
## 23 POINT (180704 332664)
v_uk <- variogram(log(zinc) ~ sqrt(dist), train_data )
v_uk_fit <- fit.variogram(v_uk, model = vgm(1, "Sph", 900, 0.1))
krige_uk <- krige(log(zinc) ~ sqrt(dist), train_data, test_data, model = v_uk_fit)
## [using universal kriging]
head(krige_uk)
## Simple feature collection with 6 features and 2 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 180625 ymin: 332664 xmax: 181191 ymax: 333370
## Projected CRS: Amersfoort / RD New
## var1.pred var1.var geometry
## 7 5.823064 0.1278743 POINT (181165 333370)
## 11 5.373305 0.1255082 POINT (181191 333115)
## 12 5.572201 0.1313608 POINT (181032 333031)
## 17 6.568145 0.1355524 POINT (180763 333104)
## 19 6.901494 0.1330293 POINT (180625 332847)
## 23 6.047125 0.1216261 POINT (180704 332664)
actual <- log(test_data$zinc)
predicted <- log(krige_uk$var1.pred)
dfuk <- data.frame(actual, predicted)
ggplot(dfuk, aes(x=actual, y=predicted)) + geom_point()
dfuk$resid <- dfuk$actual - dfuk$predicted
ggplot(dfuk, aes(x=resid)) + geom_histogram(col="white")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.